setwd("~/HomeWork/Practice")
devtools::install_github("kassambara/datarium")
## Skipping install of 'datarium' from a github remote, the SHA1 (f9f9b3a6) has not changed since last install.
## Use `force = TRUE` to force installation
data("marketing", package="datarium")
head(marketing)
data("swiss")
head(swiss)
data("Boston", package="MASS")
head(Boston)
Loading Required R packages
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
theme_set(theme_bw())
Preparing the data
# Load the data
data("marketing", package = "datarium")
# Inspect th data
sample_n(marketing,3)
Split the data into training and test set
set.seed(123)
training.samples <- marketing$sales %>% createDataPartition(p=0.8, list = FALSE)
train.data <- marketing[training.samples,]
test.data <- marketing[-training.samples,]
summary(train.data)
## youtube facebook newspaper sales
## Min. : 0.84 Min. : 0.00 Min. : 0.36 Min. : 1.92
## 1st Qu.: 85.95 1st Qu.:11.61 1st Qu.: 19.11 1st Qu.:12.48
## Median :196.08 Median :25.44 Median : 35.16 Median :15.48
## Mean :179.24 Mean :27.51 Mean : 39.07 Mean :16.77
## 3rd Qu.:264.54 3rd Qu.:43.89 3rd Qu.: 55.02 3rd Qu.:20.88
## Max. :355.68 Max. :59.52 Max. :136.80 Max. :32.40
dim(train.data)
## [1] 162 4
dim(test.data)
## [1] 38 4
Computing linear regression in r
model <- lm(sales ~., data = train.data)
summary(model)
##
## Call:
## lm(formula = sales ~ ., data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4122 -1.1101 0.3475 1.4218 3.4987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.391883 0.440622 7.698 1.41e-12 ***
## youtube 0.045574 0.001592 28.630 < 2e-16 ***
## facebook 0.186941 0.009888 18.905 < 2e-16 ***
## newspaper 0.001786 0.006773 0.264 0.792
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.119 on 158 degrees of freedom
## Multiple R-squared: 0.8902, Adjusted R-squared: 0.8881
## F-statistic: 426.9 on 3 and 158 DF, p-value: < 2.2e-16
# Make predictios
predictions <- model %>% predict(test.data)
# Model Performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$sales)
## [1] 1.590096
# (b) R-square
R2(predictions, test.data$sales)
## [1] 0.9380644
data("Boston", package = "MASS")
set.seed(123)
head(Boston)
training1.samples <- Boston$medv %>% createDataPartition(p=0.8, list = FALSE)
train1.data <- Boston[training.samples,]
test1.data <- Boston[-training.samples, ]
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Let’s start with fitting a linear regression model
model.lm <- lm(medv~lstat, data = train1.data)
summary(model.lm)
##
## Call:
## lm(formula = medv ~ lstat, data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.889 -3.805 -1.656 2.059 19.972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.98512 0.88151 38.55 <2e-16 ***
## lstat -0.88925 0.06524 -13.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.548 on 160 degrees of freedom
## Multiple R-squared: 0.5373, Adjusted R-squared: 0.5344
## F-statistic: 185.8 on 1 and 160 DF, p-value: < 2.2e-16
prediction <- model.lm %>% predict(test1.data)
data.frame(RMSE = RMSE(prediction, test1.data$medv),
R2 = R2(prediction, test1.data$medv))
Let’s visualize the data
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y ~ x)
medv = b0 + b1 ∗ lstat + b2 ∗ lstat2
mod2 <- lm(medv ~ lstat + I(lstat*lstat), data = train1.data)
summary(mod2)
##
## Call:
## lm(formula = medv ~ lstat + I(lstat * lstat), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3393 -3.2427 -0.5934 2.0975 16.7311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.432475 1.274260 34.084 < 2e-16 ***
## lstat -2.520720 0.189206 -13.323 < 2e-16 ***
## I(lstat * lstat) 0.053205 0.005921 8.987 7.09e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.532 on 159 degrees of freedom
## Multiple R-squared: 0.6932, Adjusted R-squared: 0.6893
## F-statistic: 179.6 on 2 and 159 DF, p-value: < 2.2e-16
prediction2 <- mod2 %>% predict(test1.data)
data.frame(
RMSE = RMSE(prediction2, test1.data$medv),
R2 = R2(prediction2, test1.data$medv)
)
mod22 <- lm(medv ~ poly(lstat,2), data = train1.data)
summary(mod22)
##
## Call:
## lm(formula = medv ~ poly(lstat, 2), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3393 -3.2427 -0.5934 2.0975 16.7311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.5407 0.3561 66.116 < 2e-16 ***
## poly(lstat, 2)1 -75.6187 4.5318 -16.686 < 2e-16 ***
## poly(lstat, 2)2 40.7253 4.5318 8.987 7.09e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.532 on 159 degrees of freedom
## Multiple R-squared: 0.6932, Adjusted R-squared: 0.6893
## F-statistic: 179.6 on 2 and 159 DF, p-value: < 2.2e-16
prediction22 <- mod22 %>% predict(test1.data)
data.frame(
RMSE = RMSE(prediction22, test1.data$medv),
R2 = R2(prediction22, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y ~ poly(x,2))
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y ~ (x + I(x*x)))
mod6 <- lm(medv ~ poly(lstat, 6), data = train1.data)
summary(mod6)
##
## Call:
## lm(formula = medv ~ poly(lstat, 6), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.4412 -2.3739 -0.7595 1.7344 15.9725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.5407 0.3304 71.245 < 2e-16 ***
## poly(lstat, 6)1 -75.6187 4.2055 -17.981 < 2e-16 ***
## poly(lstat, 6)2 40.7253 4.2055 9.684 < 2e-16 ***
## poly(lstat, 6)3 -16.9822 4.2055 -4.038 8.45e-05 ***
## poly(lstat, 6)4 12.1006 4.2055 2.877 0.00458 **
## poly(lstat, 6)5 -9.1610 4.2055 -2.178 0.03089 *
## poly(lstat, 6)6 -2.2956 4.2055 -0.546 0.58595
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.206 on 155 degrees of freedom
## Multiple R-squared: 0.7424, Adjusted R-squared: 0.7324
## F-statistic: 74.45 on 6 and 155 DF, p-value: < 2.2e-16
As we can see in above model summary the polynomial term beyond 5 is not significant
Let’s visualize model
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y ~ poly(x,6))
mod5 <- lm(medv ~ poly(lstat, 5), data = train1.data)
summary(mod5)
##
## Call:
## lm(formula = medv ~ poly(lstat, 5), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.0370 -2.4259 -0.7941 1.6414 16.1361
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.5407 0.3297 71.406 < 2e-16 ***
## poly(lstat, 5)1 -75.6187 4.1961 -18.021 < 2e-16 ***
## poly(lstat, 5)2 40.7253 4.1961 9.706 < 2e-16 ***
## poly(lstat, 5)3 -16.9822 4.1961 -4.047 8.14e-05 ***
## poly(lstat, 5)4 12.1006 4.1961 2.884 0.00448 **
## poly(lstat, 5)5 -9.1610 4.1961 -2.183 0.03051 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.196 on 156 degrees of freedom
## Multiple R-squared: 0.7419, Adjusted R-squared: 0.7336
## F-statistic: 89.69 on 5 and 156 DF, p-value: < 2.2e-16
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y ~ poly(x,5))
mod.log <- lm(medv ~ log(lstat), data = train1.data)
summary(mod.log)
##
## Call:
## lm(formula = medv ~ log(lstat), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.610 -2.975 -1.048 1.886 17.093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.2130 1.4077 35.67 <2e-16 ***
## log(lstat) -11.5918 0.5928 -19.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.43 on 160 degrees of freedom
## Multiple R-squared: 0.705, Adjusted R-squared: 0.7032
## F-statistic: 382.4 on 1 and 160 DF, p-value: < 2.2e-16
prediction.log <- mod.log %>% predict(test1.data)
data.frame(
RMSE = RMSE(prediction.log, test1.data$medv),
R2 = R2(prediction.log, test1.data$medv)
)
ggplot(data = train1.data, aes(lstat, medv))+
geom_point() +
stat_smooth(method = lm, formula = y~log(x))
library(splines)
knots <- quantile(train1.data$lstat, p = c(0.25, 0.5, 0.75))
model.spline <- lm(medv ~bs(lstat,knots=knots), data = train1.data)
summary(model.spline)
##
## Call:
## lm(formula = medv ~ bs(lstat, knots = knots), data = train1.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.0823 -2.0512 -0.6894 1.8034 15.8245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.032 2.643 16.658 < 2e-16 ***
## bs(lstat, knots = knots)1 -1.127 4.160 -0.271 0.787
## bs(lstat, knots = knots)2 -22.744 2.831 -8.034 2.23e-13 ***
## bs(lstat, knots = knots)3 -19.776 3.102 -6.376 1.98e-09 ***
## bs(lstat, knots = knots)4 -33.442 3.570 -9.366 < 2e-16 ***
## bs(lstat, knots = knots)5 -27.995 4.901 -5.713 5.53e-08 ***
## bs(lstat, knots = knots)6 -29.583 4.500 -6.574 7.08e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.097 on 155 degrees of freedom
## Multiple R-squared: 0.7555, Adjusted R-squared: 0.746
## F-statistic: 79.82 on 6 and 155 DF, p-value: < 2.2e-16
prediction.spline <- model.spline %>% predict(test1.data)
## Warning in bs(lstat, degree = 3L, knots = c(`25%` = 6.5425, `50%` =
## 10.285, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
data.frame(
RMSE = RMSE(prediction.spline, test1.data$medv),
R2 = R2(prediction.spline, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = lm, formula = y~bs(x,df=3))
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-27. For overview type 'help("mgcv-package")'.
model.additive <- gam(medv ~ s(lstat), data = train1.data)
summary(model.additive)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## medv ~ s(lstat)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.5407 0.3261 72.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(lstat) 5.825 7.01 65.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.739 Deviance explained = 74.9%
## GCV = 17.99 Scale est. = 17.232 n = 162
prediction.additive <- model.additive %>% predict(test1.data)
data.frame(
RMSE = RMSE(prediction.additive, test1.data$medv),
R2 = R2(prediction.additive, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) +
geom_point() +
stat_smooth(method = gam, formula = y ~ s(x))
In regression model, the most commonly known evaluation metrics include:
R-squared (R2), which is the proportion of variation in the outcome that is explained by the predictor variables. In multiple regression models, R2 corresponds to the squared correlation between the observed outcome values and the predicted values by the model. The Higer the R-squared, the better the model.
Root Mean Squared Error (RMSE), which measures the average error performed by the model in the predicting the outcome for an observation. Mathematically, the RMSE is the square root of the mean squared error (MSE), which is the average squared difference between the observed actual outcome values and the values predicted by the model. So, MSE = mean((observeds - predicteds)^2) and RMSE = sqrt(MSE). The lower the RMSE, the better the model.
Residual Standard Error (RSE), also known as the model sigma, is a variant of the RMSE adjusted for the number of predictors in the model. The lower the RSE, the better the model. In practice, the difference between RMSE and RSE is very small, particularly for large multivariate data.
Mean Absolute Error (MAE), like the RMSE, the MAE measures the prediction error. Mathematically, it is the average absolute difference between obsered and predicted out-comes, MAE = mean(abs(observeds - predicteds)) . MAE is less sensitive to outliers compared to RMSE.
The problem with the above metrics, is that they are sensible to the inclusion of additional variables in the model, even if those variables don’t have significant contribution in explaining the outcome. Put in other words, including additional variables in the model will always increase the R2 and reduce the RMSE. So, we need a more robust metric to guide the model choice.
Concerning R2, there is an adjusted version, called Adjusted R-squared, which adjusts the R2 for having too many variables in the model.
Additionally, there are four other important metrics - AIC, AICc, BIC and Mallows Cp - tha are commonly used for model evaluation and selection. These are an unbiased estimate of the model prediction error MSE. The lower these metrics, he better the model.
AIC stands for (Akike’s Information Criteria), a metric developeed by the Japanese Statistician, Hirotugu Akaike, 1970. The basic idea of AIC is to penalize the inclusion of additional variables to a model. It adds a penalty that increases the error when including additional terms. The lowwer the AIC, the better the model.
AICc is a version of AIC corrected for small sample sizes.
BIC (or Bayesian information criteria) is a variant of AIC with a strong penalty for including additional variables to the model.
Mallows Cp: Avariant of AIC developed by Colin Mallows.
Generally, most commonly used metrics, for measuring regression model quality and comparing models, are: Adjusted R2, AIC, BIC and Cp
In the following sections, we’ll sho you how to compute these above mentioned metrics.
library(modelr)
library(broom)
##
## Attaching package: 'broom'
## The following object is masked from 'package:modelr':
##
## bootstrap
# Load the data
data("swiss")
# Inspect the data
sample_n(swiss,3)
We start by creating two models:
model.swiss.1 <- lm(Fertility~., data = swiss)
summary(model.swiss.1)
##
## Call:
## lm(formula = Fertility ~ ., data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2743 -5.2617 0.5032 4.1198 15.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.91518 10.70604 6.250 1.91e-07 ***
## Agriculture -0.17211 0.07030 -2.448 0.01873 *
## Examination -0.25801 0.25388 -1.016 0.31546
## Education -0.87094 0.18303 -4.758 2.43e-05 ***
## Catholic 0.10412 0.03526 2.953 0.00519 **
## Infant.Mortality 1.07705 0.38172 2.822 0.00734 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.671
## F-statistic: 19.76 on 5 and 41 DF, p-value: 5.594e-10
data.frame(
R2 = rsquare(model.swiss.1, data = swiss),
RMSE = rmse(model.swiss.1, data = swiss),
MAE = mae(model.swiss.1, data = swiss),
AIC = AIC(model.swiss.1),
BIC = BIC(model.swiss.1)
)
model.swiss.2 <- lm(Fertility~. - Examination, data = swiss)
summary(model.swiss.2)
##
## Call:
## lm(formula = Fertility ~ . - Examination, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.6765 -6.0522 0.7514 3.1664 16.1422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.10131 9.60489 6.466 8.49e-08 ***
## Agriculture -0.15462 0.06819 -2.267 0.02857 *
## Education -0.98026 0.14814 -6.617 5.14e-08 ***
## Catholic 0.12467 0.02889 4.315 9.50e-05 ***
## Infant.Mortality 1.07844 0.38187 2.824 0.00722 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared: 0.6993, Adjusted R-squared: 0.6707
## F-statistic: 24.42 on 4 and 42 DF, p-value: 1.717e-10
data.frame(
R2 = rsquare(model.swiss.2, data = swiss),
RMSE = rmse(model.swiss.2, data = swiss),
MAE = mae(model.swiss.2, data = swiss),
AIC = AIC(model.swiss.2),
BIC = BIC(model.swiss.2)
)
glance(model.swiss.1)
glance(model.swiss.2)
Manual computation of R2, RMSE and MAE
swiss %>% add_predictions(model.swiss.1) %>% summarise(
R2 = cor(Fertility, pred)^2,
MSE = mean(Fertility - pred)^2,
RMSE = sqrt(MSE),
MAE = mean(abs(Fertility - pred))
)
glance(model.swiss.1) %>% select(adj.r.squared, sigma, AIC, BIC, p.value)
glance(model.swiss.2) %>% select(adj.r.squared, sigma, AIC, BIC, p.value)
From the output above, it can be seen that:
The two models have exactly the samed Adjusted R2 (0.67), meaning that they are equivalent in explaining the outcome, here fertility score. Additionally, they have the same amount of residual standard error (RSE or sigma = 7.17). However, the model 2 is more simple than model 1 because it incorporates less variables. All things equal, the simple model is always better in statistics.
The AIC and the BIC of the model 2 are lower than those of the model1. In model comparison strategies, the model with the lowest AIC and BIC score is preferred.
Finally, the F-statistic p.value of the model 2 is lower than the one of the model 1. This means that the model 2 is statistically more significant compared to model 1, which is consistent to the avove conclusion.
Note that, the RMSE and the RSE are measured in the same scale as the outcome variable. Dividing the RSE by the average value of the outcome variable will give you the prediction error rate, which should be as small as possible:
sigma(model.swiss.1)/mean(swiss$Fertility)
## [1] 0.1021544
In our example the average prediction error rate is 10%
Cross-validation refers to a set of methods for measuring the performance of a given predictive model on new test data sets.
The basic idea, behind cross-validation techniques, consists of dividing the data into two sets:
Cross-validation is also known as a resampling method because it involves fitting the same statistical method multiple times using different subsets of the data.
The different cross-validation methods for assessing model performance. We cover the following approches:
After building a model, we are interested in determining the accuracy of this model on predicting the outcome for new unseen observations not used to build the model. Put in other words, we want to estimate the prediction error.
To do so, the basic strategy is to:
Briefly, cross-validation algorithms can be summarized as follow:
The following sections describe the diffferent cross-validation techniques.
The validation set approach consists of randomly splitting the data into two sets: one set is used to train the model and the remaining other set is used to test the model.
The process works as follow:
The example below splits the swiss data set so that 80% is used for training a linear regression model and 20% is used to evaluate the model performance.
# Split the data into training and test set
set.seed(123)
training.samples <- swiss$Fertility %>%
createDataPartition(p=0.8, list = FALSE)
train.swiss.data <- swiss[training.samples, ]
test.swiss.data <- swiss[-training.samples, ]
# Build the model
model.swiss.lm <- lm(Fertility ~ ., data = train.swiss.data)
# Make predictions and compute the R2, RMSE and MAE
predictions.swiss.lm <- model.swiss.lm %>% predict(test.swiss.data)
data.frame(
R2 = R2(predictions.swiss.lm, test.swiss.data$Fertility),
RMSE = RMSE(predictions.swiss.lm, test.swiss.data$Fertility),
MAE = MAE(predictions.swiss.lm, test.swiss.data$Fertility)
)
When comparing two models, the one that produces the lowest test sample RMSE is the preferred model.
The RMSE and the MAE are measured in the same scale as the outcome variable. Dividing the RMSE by the average value of the outcome variable will give you the prediction error rate, which sould be as small as possible:
RMSE(predictions.swiss.lm, test.swiss.data$Fertility)/mean(test.swiss.data$Fertility)
## [1] 0.1281514
Note that, the validation set method is only useful when you have a large data set that can be partitioned. A disadvantage is that we build a model on a fraction of the data set only, possibly leaving out some interesting information about data, leading to higher bias. Therefore, the test error rate can be highly variable, depending on which observations are included in the training set and which observation are included in the validation set.
This method works as follows: 1. Leave out one data point and build the model on the rest of the data set 2. Test the model against the data point that is left out at step 1 and record the test error associated with the prediction 3. Repeat the process for all data points 4. Compoute the overall prediction error by taking the average of all these test error estimates recoreded at step3.
Practical example in R using the caret package:
# Define training control
train.control <- trainControl(method = "LOOCV")
# Train the model
model.loocv <- train(Fertility ~., data = swiss, method = "lm", trControl = train.control)
# Summarize the results
print(model.loocv)
## Linear Regression
##
## 47 samples
## 5 predictor
##
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation
## Summary of sample sizes: 46, 46, 46, 46, 46, 46, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 7.738618 0.6128307 6.116021
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
The advantage of the LOOCV method is that we make use all data points reducing potential bias.
However, the process is repeated as many times as there are data points, resulting to a higher exection time when n is extreamely large.
Additionally, we test the model performance against one data, point at each iteration. This might result to higher variation in the prediction error, if some data points are outliers. So, we need a good ratio of testing data points, a solution provided by the k-fold cross-validation method.
The k-fold cross validation method evaluates the model performance on different subset of the training data and then calculate the average prediction error rate. The algorithm is as follow:
K-fold cross-validation (CV) is a robust method for estimating the accuracy of a model. The most obvious advantage of k-fold CV compared to LOOCV is computational. A less obvious but potentially more important advantage of k-fold CV is that it often fives more accurate estimates of the test error rate than does LOOCV
In practice, one typically performs k-fold cross-validation using k = 5 or k = 10, as these values have been shown empirically to yield test error rate estimates that suffer neither from excessively high bias nor from very high variance.
The following example uses 10-fold cross validation to estimate the prediction error. Make sure to set seed for reproducibility.
# Define training control
set.seed(123)
train.control <- trainControl(method = "cv", number = 10)
# Train the model
model.cv <- train(Fertility~., data = swiss, method = "lm", trControl = train.control)
# Summarize the results
print(model.cv)
## Linear Regression
##
## 47 samples
## 5 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 43, 42, 42, 41, 43, 41, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 7.379432 0.7512317 6.032731
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
The process of splitting the data into k-folds can be repeated a number of times, this is called repeated k-fold cross validation.
The bootstrap method is used to quantify the uncertainty with a given statistical estimator or with a predictive model.
It consists of randomly selecting a sample of n observations from the original data set. This subset, called bootstrap data set is then used to evaluate the model.
This procedure is repeated a large number of times and the standard error of the bootstrap estimate is then calculated. The results provide an indication of the variance of the model performance.
Note that, the sampling is performed with replacement, which means that the same observation can occur more than once in the boostrrap data set.
# Define training control
train.control.bootstrap <- trainControl(method = "boot", number = 100)
model.bootstrap <- train(Fertility ~., data = swiss, method = "lm", trControl = train.control.bootstrap)
# Summarize the results
print(model.bootstrap)
## Linear Regression
##
## 47 samples
## 5 predictor
##
## No pre-processing
## Resampling: Bootstrapped (100 reps)
## Summary of sample sizes: 47, 47, 47, 47, 47, 47, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 8.432357 0.6048287 6.791066
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
The bootstrap approach can be used to quantify the uncertainty (or standard error) associated with any given statistical estimator.
For example, you might want to estimate the accuracy of the linear regression beta coefficients using bootstrap method.
The different steps are as follows:
We start by creating a function that retrns the regression model coefficeients:
model_coef <- function(data, index) {
coef(lm(Fertility~., data = data, subset = index))
}
model_coef(swiss, 1:47)
## (Intercept) Agriculture Examination Education
## 66.9151817 -0.1721140 -0.2580082 -0.8709401
## Catholic Infant.Mortality
## 0.1041153 1.0770481
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
##
## melanoma
boot(swiss, model_coef, 500)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = swiss, statistic = model_coef, R = 500)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 66.9151817 0.572138874 10.84264891
## t2* -0.1721140 -0.004654338 0.06456320
## t3* -0.2580082 -0.027889510 0.25405701
## t4* -0.8709401 0.002319556 0.21736743
## t5* 0.1041153 -0.002166432 0.03067023
## t6* 1.0770481 0.008581232 0.43205390
In the output above,
original column corresponds to the regression coefficients. The associated standard errors are given in the column std.error .
t1 corresponds to the intercept, t2 conrresponds to *Agriculture** and so on ..
For example, it can be observe that, the standard error (SE) of the regression coefficient associated with Agriculture is 0.06
Note that, the standard errors measure the variability/accuracy of the beta coefficients. It can be used to compute the confidence intervals of the coefficients.
For example, the 95% confidence interval for a given coefficient b is defined as b+/-1.96*SE(b), where:
That is, there is approximately a 95% chance that the interval[-0.306,-0.036] will contain the true value of the coefficient.
Using the standard lm() function gives a slightly different standard errors, because the linear model make some assumptions about the data:
summary(lm(Fertility ~., data = swiss))$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.9151817 10.70603759 6.250229 1.906051e-07
## Agriculture -0.1721140 0.07030392 -2.448142 1.872715e-02
## Examination -0.2580082 0.25387820 -1.016268 3.154617e-01
## Education -0.8709401 0.18302860 -4.758492 2.430605e-05
## Catholic 0.1041153 0.03525785 2.952969 5.190079e-03
## Infant.Mortality 1.0770481 0.38171965 2.821568 7.335715e-03
The bootstrap approach does not rely on any of these assumptions made by the linear model and so it is likely giving a more accurate estimate on the coefficients standard errors than tis the summary() function.
library(tidyverse)
library(caret)
library(leaps)
The R function regsubsets() [leaps package] can be used to identify different best models of different sizes.
You need to specify the option nvmax, which represents the maximum number of predictors to incorporate in the mode. For example, if nvmax = 5, the function will return up to the best 5-variables model, that is, it returns the best 1-varable model, the best 2-variables model, .., the best 5-variables models.
In our example, we have only 5 predictor variables in the data. So, we’ll use nvmax = 5
models.regsubsets <- regsubsets(Fertility~., data= swiss, nvmax = 5)
summary(models.regsubsets)
## Subset selection object
## Call: regsubsets.formula(Fertility ~ ., data = swiss, nvmax = 5)
## 5 Variables (and intercept)
## Forced in Forced out
## Agriculture FALSE FALSE
## Examination FALSE FALSE
## Education FALSE FALSE
## Catholic FALSE FALSE
## Infant.Mortality FALSE FALSE
## 1 subsets of each size up to 5
## Selection Algorithm: exhaustive
## Agriculture Examination Education Catholic Infant.Mortality
## 1 ( 1 ) " " " " "*" " " " "
## 2 ( 1 ) " " " " "*" "*" " "
## 3 ( 1 ) " " " " "*" "*" "*"
## 4 ( 1 ) "*" " " "*" "*" "*"
## 5 ( 1 ) "*" "*" "*" "*" "*"
The summary() function returns some metrics - Adjusted R2, Cp and BIC allowing us to identify the best overall model, where best is defined as the model that maximize the adjusted R2 and minimize the prediction errors
The adjusted R2 represents the proportion of variation, in the outcome, that are explained by variation in predictors values. The higher the adjusted R2, the better the model.
The best model, according to each of these metrics, can be extracted as follow:
res.sum <- summary(models.regsubsets)
data.frame(
Adj.R2 = which.max(res.sum$adjr2),
CP = which.min(res.sum$cp),
BIC = which.min(res.sum$bic)
)
There is no single correct solution to model selection, each of these criteria will lead to slightly different models.
Remembert that,
“all models are wrong, some models are useful”
So, we have different “best” models depending on which metrics we consider. We need additonal strategies.
Note also that the adjusted R2, BIC and Cp are calculated on the training data that have been used to fit the model. This means that, the model selection, using these metrics, is possibly subject to overfitting and may not perform as well when applied to new data.
A more rigorous approach is to select a models based on the prediction error computed on a new test data usin k-fold cross-validation techniques
Here, we’ll follow the procedure below:
We start by defining two helper function:
#id: model id
#object: regsubsets object
#data: data used to fit regsubsets
get_model_formula <- function(id, object){
models <- summary(object)$which[id,-1]
#get outcome variable
form <- as.formula(object$call[[2]])
outcome <- all.vars(form)[1]
#Get model predictors
predictors <- names(which(models == TRUE))
predictors <- paste(predictors, collapse = "+")
# Build model formula
as.formula(paste(outcome, "~", predictors))
}
get_model_formula(3,models.regsubsets)
## Fertility ~ Education + Catholic + Infant.Mortality
## <environment: 0x7ff6b7458560>
get_cv_error <- function (model.formula, data){
set.seed(1)
train.control <- trainControl(method = "cv", number = 5)
cv <- train(model.formula, data = data, method = "lm", trControl = train.control)
cv$results$RMSE
}
Use the above defined method to compute the prediction error
# Compute cross-validation error
model.ids <- 1:5
cv.errors <- map(model.ids, get_model_formula, models.regsubsets) %>%
map(get_cv_error, data = swiss) %>%
unlist()
cv.errors
## [1] 9.422610 8.452344 7.926889 7.678508 7.923860
# Select the model that minimize the CV error
which.min(cv.errors)
## [1] 4
It can be seen that the model with 4 variables is the best model. It has the lower prediction error. The regression coefficients of this model can be extracted as follow:
coef(models.regsubsets, 4)
## (Intercept) Agriculture Education Catholic
## 62.1013116 -0.1546175 -0.9802638 0.1246664
## Infant.Mortality
## 1.0784422
Forward selection: Which starts with no predictors in the model, Iteratively adds the most contributive predictor, and stops when the improvement is no longer statistically significant.
Backwoard selection(or backward elimination): which starts with all predictors in the model (full model), iteratively removes the least contributive predictors, and stops when you have a model where all predictors are statistically significant.
Stepwise selection (or sequential replacement) which is a combination of forward and backward selections. you start with no predictors, then sequentially add the most contributive predictors (like forward selection). After adding each new variable, remove any variables that no longer provide and improvement in the model fit
library(tidyverse)
library(caret)
library(leaps)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Fit the full model
full.model <- lm(Fertility ~., data = swiss)
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
##
## Call:
## lm(formula = Fertility ~ Agriculture + Education + Catholic +
## Infant.Mortality, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.6765 -6.0522 0.7514 3.1664 16.1422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.10131 9.60489 6.466 8.49e-08 ***
## Agriculture -0.15462 0.06819 -2.267 0.02857 *
## Education -0.98026 0.14814 -6.617 5.14e-08 ***
## Catholic 0.12467 0.02889 4.315 9.50e-05 ***
## Infant.Mortality 1.07844 0.38187 2.824 0.00722 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared: 0.6993, Adjusted R-squared: 0.6707
## F-statistic: 24.42 on 4 and 42 DF, p-value: 1.717e-10
Ridge regression shrinks the regression coefficients, so that varibles, with minor contribution to the outcome, have their coefficients close to zero.
The shrinkage of the coefficients is achieved by penalizing the regression model with a penalty term called L2-norm, which is the sum of the squared coefficients.
The amount of the penalty can be fine-tuned using a constant called lambda . Selecting a good value for lambda is critical
When lambda = 0, the penalty term has no effect, and ridge regression will produce the calssical least square coefficients. However, as lambda increases to infinite, the impact of the shrinkage penalty grows, and the ridge regression coefficients will get close zero.
Note that, in contrast to the ordinary least square regression, ridge regression is highly affected by the scale of the predictors. Therfore, it is better to standardize (i.e., scale) the predictors before applying the ridge regression, so that all the predictors are on the same scale.
The standardization of a predictor x, can be achieved using the formula x` = x/sd(x), where sd(x) is the stadard deviation of . The consequence of this is that, all standardized predictors will have a standard deviation of one allowing the final fit to not depend on the scale on which the predictors are measured.
One important advantage of the ridge regression, is that it still perfroms will, compared to the ordinary least square method, in a situation where you have a large multivariate data with the number of predictors (p) larger than the number of observations (n).
One disadvantage of the ridge regression is that, it will included all the predictors in the final model, unlike the stepwise regression methods, which will genrally select models that involve a reduced set of variables.
Ridge regression shrinks the coefficients towards zero, but it will not set any of them exactly to zero. The lasso regression is an alternative that overcomes this drawback.
Lasso stands for Least Absolute Shrinkage and Selection Operator. It shrinks the regression coefficients toward zero by penalizing the regression model with a penalty term called L1-norm, which is the sum of the absolute coefficients.
In the case of lasso regression, the penalty has the effect of forcing some of the coefficient estimates, with a minor contribution to the model, to be ecactly equal to zero. This means that, lasso can be also seen as an alternative to the subset selection methods for performing variable selection in order to reduce the complexity of the model.
As in ridge regression, selecting a good value of lambda for the lasso is critical.
One obvious advantage of lasso regression over ridge regression, is that it produces simpler and more interpretable models that incorporate only a reduced set of the predictors. However, neither ridge regression nor the lasso will universally dominate the orther.
Generally, lasso mght perform better in a situation where some of the predictors have large coefficients, and the remaining predictors have very small coefficients.
Ridge regression will perform better when the outcome is a function of many predictors, all with coefficents of roughly equal size.
Cross-validation methods can be used for identifying which of these two techniques is better on a particular data set.
Elastic Net produces a regression model that is penalized with both the L1-norm and L2-norm. The consequence of this is to effectively shrink coefficents (Like in ridge regression) and to set some coefficients to zero (as in LASSO).
library(tidyverse)
library(caret)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loaded glmnet 2.0-18
Preparing the data
We’ll use the Boston data set [in MASS package], for predicting the median house value(mdev), in Boston Suburbs, based on multiple predictor variables.
# Load the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
boston.samples <- Boston$medv %>%
createDataPartition(p=0.8, list = FALSE)
train.boston.data <- Boston[boston.samples, ]
test.boston.data <- Boston[-boston.samples, ]
You need to create two objects:
x <- model.matrix(medv~., train.boston.data)[,-1]
# Outcome variable
y <- train.boston.data$medv
We’ll use the R function glmnet() fro computing penalized linear regression models.
glmnet(x,y, alpha = 1, lambda = NULL)
##
## Call: glmnet(x = x, y = y, alpha = 1, lambda = NULL)
##
## Df %Dev Lambda
## [1,] 0 0.00000 6.908000
## [2,] 1 0.09343 6.294000
## [3,] 2 0.18670 5.735000
## [4,] 2 0.26620 5.225000
## [5,] 2 0.33220 4.761000
## [6,] 2 0.38700 4.338000
## [7,] 2 0.43240 3.953000
## [8,] 2 0.47020 3.602000
## [9,] 2 0.50150 3.282000
## [10,] 3 0.53390 2.990000
## [11,] 3 0.56100 2.725000
## [12,] 3 0.58350 2.482000
## [13,] 3 0.60210 2.262000
## [14,] 3 0.61770 2.061000
## [15,] 3 0.63050 1.878000
## [16,] 3 0.64120 1.711000
## [17,] 3 0.65010 1.559000
## [18,] 3 0.65740 1.421000
## [19,] 3 0.66360 1.294000
## [20,] 4 0.66990 1.179000
## [21,] 4 0.67560 1.075000
## [22,] 4 0.68030 0.979100
## [23,] 4 0.68420 0.892200
## [24,] 4 0.68750 0.812900
## [25,] 5 0.69080 0.740700
## [26,] 5 0.69380 0.674900
## [27,] 6 0.69730 0.614900
## [28,] 7 0.70200 0.560300
## [29,] 7 0.70610 0.510500
## [30,] 8 0.70960 0.465200
## [31,] 8 0.71430 0.423800
## [32,] 8 0.71820 0.386200
## [33,] 8 0.72140 0.351900
## [34,] 10 0.72520 0.320600
## [35,] 11 0.72850 0.292100
## [36,] 11 0.73120 0.266200
## [37,] 11 0.73350 0.242500
## [38,] 11 0.73540 0.221000
## [39,] 11 0.73690 0.201400
## [40,] 11 0.73820 0.183500
## [41,] 12 0.73950 0.167200
## [42,] 12 0.74220 0.152300
## [43,] 12 0.74440 0.138800
## [44,] 12 0.74620 0.126500
## [45,] 12 0.74760 0.115200
## [46,] 12 0.74880 0.105000
## [47,] 12 0.74990 0.095660
## [48,] 12 0.75070 0.087160
## [49,] 12 0.75140 0.079420
## [50,] 12 0.75200 0.072370
## [51,] 12 0.75250 0.065940
## [52,] 12 0.75290 0.060080
## [53,] 12 0.75320 0.054740
## [54,] 12 0.75350 0.049880
## [55,] 12 0.75370 0.045450
## [56,] 12 0.75390 0.041410
## [57,] 12 0.75410 0.037730
## [58,] 12 0.75420 0.034380
## [59,] 12 0.75430 0.031330
## [60,] 12 0.75440 0.028540
## [61,] 12 0.75450 0.026010
## [62,] 12 0.75460 0.023700
## [63,] 12 0.75460 0.021590
## [64,] 12 0.75460 0.019670
## [65,] 12 0.75470 0.017930
## [66,] 12 0.75470 0.016330
## [67,] 12 0.75470 0.014880
## [68,] 12 0.75480 0.013560
## [69,] 12 0.75480 0.012360
## [70,] 12 0.75480 0.011260
## [71,] 12 0.75480 0.010260
## [72,] 12 0.75480 0.009346
## [73,] 12 0.75480 0.008516
## [74,] 12 0.75480 0.007760
In penalized regression, you need to specify a constant lambda to adjust the amount of the coefficent shrinkage. The best lambda for your data, can be defined as the lambda that minimize the cross-validation prediction error rate. This can be determined automatically using the function cv.glmnet().
In the following section, we start by computing ridge, lasso and elastic net regresion models. Next, we’ll compare the different models in order to choose the best one for our data The best model is defined as the model that has the lowest prediction error, RMSE
Computing ridge regression
# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x,y, alpha = 0)
# Display the best lambda value
cv$lambda.min
## [1] 0.6907672
# Fit the final model on the training data
model.ridge <- glmnet(x,y, alpha = 0, lambda = cv$lambda.min)
# Display regression coefficients
coef(model.ridge)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 29.172942853
## crim -0.073961766
## zn 0.035006472
## indus -0.055702961
## chas 2.485565613
## nox -11.449222575
## rm 3.976849313
## age -0.002944308
## dis -1.221350102
## rad 0.147920742
## tax -0.006358355
## ptratio -0.869860292
## black 0.009399874
## lstat -0.483051940
# Make predictions on the test data
x.test <- model.matrix(medv~., test.boston.data)[,-1]
predictions.ridge <- model.ridge %>% predict(x.test) %>% as.vector()
# Model performance metrics
data.frame(
RMSE = RMSE(predictions.ridge, test.boston.data$medv),
Rsquare = R2(predictions.ridge, test.boston.data$medv)
)
Note that by default, the function glmnet() standardizes variables so that their scales are comparable. However, the coefficients are always returned on the original scale.
Computing lasso regression
The only difference between the R code used for ridge regression is that for lasso regression you need to specify the argument alpha = 1 instead of alpha - 0
# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x,y,alpha = 1)
# Display the best lambda value
cv$lambda.min
## [1] 0.007759554
# Fit the final model on the training data
model <- glmnet(x,y, alpha = 1, lambda = cv$lambda.min)
#Display regression coefficients
coef(model)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 36.962374270
## crim -0.092549948
## zn 0.048543171
## indus -0.008321076
## chas 2.287418592
## nox -16.832483690
## rm 3.810180182
## age .
## dis -1.598716155
## rad 0.286797839
## tax -0.012456750
## ptratio -0.950997301
## black 0.009652895
## lstat -0.528739166
# Make predictions on the test data
x.laso.test <- model.matrix(medv ~., test.boston.data)[,-1]
predictions.lasso <- model %>% predict(x.test) %>% as.vector()
# Model performance metrics
data.frame(
RMSE = RMSE(predictions.lasso, test.boston.data$medv),
Rsquare = R2(predictions.lasso, test.boston.data$medv)
)
The elastic net regression can be easily computed using the caret workflow, which invokes the glmnet package.
We use caret to automatically select the best tuning parameters alpha and lambda. the caret packages tests a range of possible alpha and lambda values, then selects the best values fro lambda and alpha, resulting to a final modl that is an elastic net modle.
Here, we’ll test the combination of 10 different values for alpha and lambda. This is specified using the option tuneLength.
The best alpha and lambda values are those values that minimize the cross-validation error
# # Build the model using the training set
# set.seed(123)
# model.elastic <- train(
# medv ~., data = train.boston.data, method = "glmnet",
# trcontrol = trainControl("cv", number = 5),
# tuneLength = 10
# )
#
# # Best tuning parameter
# model.elastic$bestTune
The principal component regression(PCR) first applies Principal Component Analysis on the data set to summarize the original predictor variables into few new variables also known as principal component (PCs), which are a linear combination of the original data.
These PCs are then used to build the linear regression model. The number of principal components, to incorporate in the model, is chosen by corss-validation (cv). Note that, PCR is suitable when the data set contains highly correlated predictors.
A possible drawback of PCR is that we have no guarantee that the selected principal components are associated with the outcome. Here, the selection of the principal components to incorporate in the model is not supervised by the outcome variable.
An alternative to PCR is the Partial Least Squares (PLS) regression, which identifies new principal components that not only summarizes the original predictors, but also that are related to the outcome. These components are then used to fit the regression model. So compared PCR, PLS uses a dimension reduction strategy that is supervised by the outcome.
Like PCR, PLS is convenient for data with highly-correlated predictors. The number of PCs used in PLS is generally chosen by cross-validation. Predictors and the outcome variables should be generally standardized, to make the variables comparable.
library(tidyverse)
library(caret)
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
##
## R2
## The following object is masked from 'package:stats':
##
## loadings
We’ll use the boston data set
# Build the model on training set
set.seed(123)
model.pc <- train(
medv~., data = train.boston.data, method = "pcr",
scale = TRUE,
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Plot model RMSE vs different values of components
plot(model.pc)
#Print the best tunning parameter ncomp that
# minimize the cross-validation error, RMSE
model.pc$bestTune
summary(model.pc$finalModel)
## Data: X dimension: 407 13
## Y dimension: 407 1
## Fit method: svdpc
## Number of components considered: 5
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps
## X 47.48 58.40 68.00 74.75 80.94
## .outcome 38.10 51.02 64.43 65.24 71.17
# Make predictions
predictions.pc <- model.pc %>% predict(test.boston.data)
# Model performance metrics
data.frame(
RMSE = caret::RMSE(predictions.pc, test.boston.data$medv),
Rsquare = caret::R2(predictions.pc, test.boston.data$medv)
)
The plot shows the prediction error made by the model acccording to the number of principal components incorporated in the model.
Our analysis shows that, choosing five principal components (ncomp = 5) gives the smallest prediction error RMSE.
The summary() function also provides the percentage of variance explained in the predictors(x) and in the outcome (medv) using different numbers of components.
For example, 80.94% of the variation (or information) contained in the predictors are captured by 5 components (ncomp = 5). Additionally, setting ncomp = 5, captures 71% of the information in the outcome variable (medv), which is good
Taken together, cross-validation identifies ncomp - 5 as the optimal number of PCs that minimize the prediction error (RMSE) and explains enough variation in the predictors and in the outcome
# Build the model on training set
set.seed(123)
model.pls <- train(
medv~., data = train.boston.data, method = "pls",
scale = TRUE,
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
# Plot model RMSE vs Different values of components
plot(model.pls)
model.pls$bestTune
summary(model.pls$finalModel)
## Data: X dimension: 407 13
## Y dimension: 407 1
## Fit method: oscorespls
## Number of components considered: 9
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## X 46.19 57.32 64.15 69.76 75.63 78.66 82.85
## .outcome 50.90 71.84 73.71 74.71 75.18 75.35 75.42
## 8 comps 9 comps
## X 85.92 90.36
## .outcome 75.48 75.49
#Make predictions
predictions.pls <- model.pls %>% predict(test.boston.data)
# Model performance metrics
data.frame(
RMSE = caret::RMSE(predictions.pls, test.boston.data$medv),
Rsquare = caret::R2(predictions.pls, test.boston.data$medv)
)
The optimal number of principal components included in the PLS model is 9. This captures 90% of the variation in the predictors and 75% of the variation in the outcome variable (medv).
In our example, the cross-validation error RMSE obtained with the PLS model is lower than the RMSE obtained using the PCR method. So, the PLS model is the best model, for explaining our data, compaed to the PCR model.
In this part, we’ll comver the follwing topics:
Most of the classification algorithms computes the probability of belonging to a given class.
Observations are then assigned to the class that have the highest probability score.
Generaly, you need to decide a probability cutoff above which you consider the an observation as belonging to a given class.
Dataset will be using
The Pima Indian Diabetes data set is available in the mlbench package. It will be used for binary classification.
# Load the data set
data("PimaIndiansDiabetes2", package = "mlbench")
# Inspect the data
head(PimaIndiansDiabetes2,4)
str(PimaIndiansDiabetes2)
## 'data.frame': 768 obs. of 9 variables:
## $ pregnant: num 6 1 8 1 0 5 3 10 2 8 ...
## $ glucose : num 148 85 183 89 137 116 78 115 197 125 ...
## $ pressure: num 72 66 64 66 40 74 50 NA 70 96 ...
## $ triceps : num 35 29 NA 23 35 NA 32 NA 45 NA ...
## $ insulin : num NA NA NA 94 168 NA 88 NA 543 NA ...
## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 NA ...
## $ pedigree: num 0.627 0.351 0.672 0.167 2.288 ...
## $ age : num 50 31 32 21 33 30 26 29 53 54 ...
## $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...
The data contains 768 individuals (female) and 9 clinical variables for predictin the probability of individuals in being diabete-positive or negative:
| Column Names | Description |
|---|---|
| pregnant | Number of times pregnant |
| glucose | Plasma glucose concentration (glucose tolerance test) |
| pressure | Diastolic blood pressure (mm Hg) |
| triceps | Triceps skin fold thickness (mm) |
| insulin | 2-Hour serum insulin (mu U/ml) |
| mass | Body mass index (weight in kg/(height in m)^2) |
| pedigree | Diabetes pedigree function |
| age | Age (years) |
| diabetes | Class variable (test for diabetes) |
Performing the following steps might improve the accuracy of your model
set.seed(123)
training.Pima.samples <- PimaIndiansDiabetes2$diabetes %>%
createDataPartition(p = 0.8, list = FALSE)
train.pima.data <- PimaIndiansDiabetes2[training.Pima.samples, ]
test.pima.data <- PimaIndiansDiabetes2[-training.Pima.samples, ]
# Fit the model
model.lr.pima <- glm(diabetes~., data = train.pima.data, family = binomial)
# Summarize the model
summary(model.lr.pima)
##
## Call:
## glm(formula = diabetes ~ ., family = binomial, data = train.pima.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6751 -0.6666 -0.3588 0.6581 2.5650
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.196902 1.369247 -7.447 9.54e-14 ***
## pregnant 0.082032 0.061219 1.340 0.18025
## glucose 0.036517 0.006381 5.723 1.05e-08 ***
## pressure 0.001333 0.013053 0.102 0.91863
## triceps 0.008425 0.018649 0.452 0.65145
## insulin -0.001219 0.001441 -0.845 0.39784
## mass 0.081434 0.030448 2.675 0.00748 **
## pedigree 1.186528 0.470790 2.520 0.01173 *
## age 0.030886 0.020337 1.519 0.12884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 402.38 on 314 degrees of freedom
## Residual deviance: 280.83 on 306 degrees of freedom
## (300 observations deleted due to missingness)
## AIC: 298.83
##
## Number of Fisher Scoring iterations: 5
## Make predictions
probabilities <- model.lr.pima %>% predict(test.pima.data, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")
# predicted.classes
# Model accuracy
mean(predicted.classes == test.pima.data$diabetes)
## [1] NA
The simple logistic regression is used to predict the probability of class membership based on one single predictor variable.
The following R code builds a model to predict the probability of being diabetes-positive based on the plasma glucose concentration:
model.logit.simple <- glm(diabetes ~ glucose, data = train.pima.data, family = binomial)
summary(model.logit.simple)
##
## Call:
## glm(formula = diabetes ~ glucose, family = binomial, data = train.pima.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1322 -0.7882 -0.5157 0.8641 2.2706
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.555585 0.482107 -11.52 <2e-16 ***
## glucose 0.039188 0.003697 10.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 790.12 on 610 degrees of freedom
## Residual deviance: 637.56 on 609 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 641.56
##
## Number of Fisher Scoring iterations: 4
The output above shows the estimate of the regression beta coefficients and their significance levels. The intercept (b0) is -5.55 and the coefficient of glucose variable is 0.039.
The logistic equation can be written as p = exp(-5.55 + 0.039*glucose)/[1+exp(-5.55+0.039*glucose)]. Using this formula, for each new glucose plasma concentration value, you can predict the probability of the individuals in bein diabetes positive.
Predictions can be easily made using the function predict(). Use the option type = “response” to directly obtain the probabilities
newdata <- data.frame(glucose = c(20,190))
probabilities <- model.logit.simple %>% predict(newdata, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")
predicted.classes
## 1 2
## "neg" "pos"
train.pima.data %>%
mutate(prob = ifelse(diabetes == "pos", 1, 0)) %>%
ggplot(aes(glucose,prob)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "glm", method.args = list(family = "binomial"))+
labs(
title = "Simple Logistic Regression Model",
x = "Plasma Glucose Concentration",
y = "Probability of being diabetes-pos"
)
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
plot(model.logit.simple)
Stepwise logistic regression consists of automatically selecting a reduced number of predictor variables for building the best performing logistic regression model.
Data set: PimaIndiansDiabetes2
Quick start R code
library(MASS)
# Fit the model
removed.missing.data <- na.omit(train.pima.data)
model <- glm(diabetes ~ ., data = removed.missing.data , family = binomial) %>% stepAIC(trace = FALSE)
summary(model)
##
## Call:
## glm(formula = diabetes ~ glucose + mass + pedigree + age, family = binomial,
## data = removed.missing.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8024 -0.6631 -0.3716 0.6617 2.5631
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.081719 1.202909 -8.381 < 2e-16 ***
## glucose 0.033770 0.005536 6.101 1.06e-09 ***
## mass 0.083808 0.022726 3.688 0.000226 ***
## pedigree 1.110791 0.456960 2.431 0.015064 *
## age 0.051063 0.014602 3.497 0.000471 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 402.38 on 314 degrees of freedom
## Residual deviance: 283.63 on 310 degrees of freedom
## AIC: 293.63
##
## Number of Fisher Scoring iterations: 5
removed.missing.data.from.test <- na.omit(test.pima.data)
probabilities.step <- model %>% predict(removed.missing.data.from.test, type = "response")
predicted.classes.step.logit <- ifelse(probabilities.step > 0.5, "pos", "neg")
#Model Accuracy
mean(predicted.classes.step.logit == removed.missing.data.from.test$diabetes)
## [1] 0.7922078
df <- data.frame("Predicted" = predicted.classes.step.logit)
(df$Predicted == test.pima.data$diabetes)
## Warning in `==.default`(df$Predicted, test.pima.data$diabetes): longer
## object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
## [1] TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE
## [12] FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
## [23] TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [34] FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE FALSE
## [45] FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE FALSE
## [56] TRUE FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE
## [67] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
## [78] TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE
## [89] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
## [100] TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE
## [111] TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE TRUE TRUE
## [122] FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE FALSE TRUE
## [133] FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [144] FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
Penalized logistic regression imposes a penalty to the logistic model for having too many variables. This results in shrinking the coefficients of the less contributive variables toward zero. This is also know as regularization. The most commonly used penalized regression include
Ridge regression: variables with minor contribution have their coefficients close to zero. However, all the variables are incorporated in the model. This is useful when all variables need to be incorporated in the model according to domain knowledge.
lasso regression: the coefficients of some less contributive variables aer forced to be exactly zero. Only the most significant variables ae kept in the final model.
elastic net regression: the combination of ridge and lasso regression. It shrinks some coefficients toward zero (like ridge regerssion) and set some coefficients to exactly zero (like lasso regression)
Required packages
library(tidyverse)
library(caret)
library(glmnet)
Preparing the data
Data set: PimaIndiansDiabetes2
# Load the data and remove NAs
Pima.Indians.data <- na.omit(PimaIndiansDiabetes2)
# Inspect the data
sample_n(Pima.Indians.data, 3)
# Split the data into training and test set
set.seed(123)
training.samples <- Pima.Indians.data$diabetes %>%
createDataPartition(p = 0.8, list = FALSE)
train.pima.indians <- Pima.Indians.data[training.samples, ]
test.pima.indians <- Pima.Indians.data[-training.samples, ]
Additional data preparation
The R function model.matrix() help to create the matrix of predictors and also automatically converts categorical predictors to appropriate dummy variables, which is required for the glmnet() function.
# Dummy code categorical predictor variables
x <- model.matrix(diabetes ~., train.pima.indians)[,-1]
# Convert the outcome (class) to a numerical variable
y <- ifelse(train.pima.indians$diabetes == "pos", 1, 0)
We’ll use the R function glmnet() for computing penalized logistic regression.
The simplified format is as follow:
library(glmnet)
glmnet(x, y, family = "binomial", alpha = 1, lambda = NULL)
##
## Call: glmnet(x = x, y = y, family = "binomial", alpha = 1, lambda = NULL)
##
## Df %Dev Lambda
## [1,] 0 3.147e-15 0.2480000
## [2,] 1 3.690e-02 0.2259000
## [3,] 1 6.743e-02 0.2059000
## [4,] 1 9.292e-02 0.1876000
## [5,] 1 1.144e-01 0.1709000
## [6,] 1 1.325e-01 0.1557000
## [7,] 1 1.479e-01 0.1419000
## [8,] 1 1.610e-01 0.1293000
## [9,] 1 1.722e-01 0.1178000
## [10,] 2 1.851e-01 0.1073000
## [11,] 2 1.970e-01 0.0978000
## [12,] 2 2.071e-01 0.0891100
## [13,] 2 2.158e-01 0.0812000
## [14,] 2 2.233e-01 0.0739800
## [15,] 3 2.314e-01 0.0674100
## [16,] 5 2.400e-01 0.0614200
## [17,] 5 2.484e-01 0.0559600
## [18,] 5 2.558e-01 0.0509900
## [19,] 5 2.620e-01 0.0464600
## [20,] 5 2.674e-01 0.0423400
## [21,] 5 2.721e-01 0.0385700
## [22,] 6 2.762e-01 0.0351500
## [23,] 6 2.798e-01 0.0320300
## [24,] 6 2.829e-01 0.0291800
## [25,] 6 2.856e-01 0.0265900
## [26,] 6 2.879e-01 0.0242300
## [27,] 6 2.898e-01 0.0220700
## [28,] 6 2.915e-01 0.0201100
## [29,] 6 2.929e-01 0.0183300
## [30,] 6 2.941e-01 0.0167000
## [31,] 6 2.951e-01 0.0152100
## [32,] 6 2.959e-01 0.0138600
## [33,] 6 2.967e-01 0.0126300
## [34,] 7 2.975e-01 0.0115100
## [35,] 7 2.984e-01 0.0104900
## [36,] 7 2.993e-01 0.0095550
## [37,] 7 2.999e-01 0.0087060
## [38,] 7 3.005e-01 0.0079330
## [39,] 7 3.010e-01 0.0072280
## [40,] 7 3.014e-01 0.0065860
## [41,] 7 3.018e-01 0.0060010
## [42,] 8 3.022e-01 0.0054680
## [43,] 8 3.025e-01 0.0049820
## [44,] 8 3.028e-01 0.0045390
## [45,] 8 3.030e-01 0.0041360
## [46,] 8 3.032e-01 0.0037690
## [47,] 8 3.034e-01 0.0034340
## [48,] 8 3.035e-01 0.0031290
## [49,] 8 3.037e-01 0.0028510
## [50,] 8 3.038e-01 0.0025980
## [51,] 8 3.038e-01 0.0023670
## [52,] 8 3.039e-01 0.0021570
## [53,] 8 3.040e-01 0.0019650
## [54,] 8 3.040e-01 0.0017900
## [55,] 8 3.040e-01 0.0016310
## [56,] 8 3.041e-01 0.0014860
## [57,] 8 3.041e-01 0.0013540
## [58,] 8 3.041e-01 0.0012340
## [59,] 8 3.042e-01 0.0011240
## [60,] 8 3.042e-01 0.0010250
## [61,] 8 3.042e-01 0.0009336
## [62,] 8 3.042e-01 0.0008506
## [63,] 8 3.042e-01 0.0007750
set.seed(123)
cv.lasso <- cv.glmnet(x,y, alpha = 1, family = "binomial")
# cv.lasso
# summary(cv.lasso)
# Fit the final model on the training data
model.logit.lasso <- glmnet(x,y, alpha = 1, family = "binomial", lambda = cv.lasso$lambda.min)
# Display regression coefficients
coef(model.logit.lasso)
## 9 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -8.6158632778
## pregnant 0.0350263984
## glucose 0.0369158677
## pressure .
## triceps 0.0164757684
## insulin -0.0003928031
## mass 0.0304940618
## pedigree 0.7854910473
## age 0.0362782651
# Make predictions on the test data
x.test <- model.matrix(diabetes~., test.pima.indians)[,-1]
probabilities.logit.lasso <- model.logit.lasso %>% predict(newx = x.test)
predict.classess.logit.lasso <- ifelse(probabilities.logit.lasso > 0.5, "pos", "neg")
#Model accuracy
observed.classes <- test.pima.indians$diabetes
mean(predict.classess.logit.lasso == observed.classes)
## [1] 0.7692308
plot(cv.lasso)
The plot displays the cross-validation error according to the log of lambda. The left dashed vertical line indicates that the log of the optimal value of lambda is approximately -5, which is the one that minimizes the prediction error. This lambda value will give the most accurate model. The exact value of lambda can be viewed as follow:
cv.lasso$lambda.min
## [1] 0.008706319
cv.lasso$lambda.1se
## [1] 0.06740987
Using lambda.min as the best lambda, gives the following regression coefficients:
coef(cv.lasso, cv.lasso$lambda.min)
## 9 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -8.6156146833
## pregnant 0.0350758913
## glucose 0.0369156360
## pressure .
## triceps 0.0164842605
## insulin -0.0003924262
## mass 0.0304848446
## pedigree 0.7855063693
## age 0.0362650459
coef(cv.lasso, cv.lasso$lambda.1se)
## 9 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -4.657500725
## pregnant .
## glucose 0.026284471
## pressure .
## triceps 0.001905497
## insulin .
## mass .
## pedigree .
## age 0.017338402
The logistic regression method assumes that:
The outcome is a binary or dichotomous variable like yes vs no, positive vs negative, 1 vs 0.
there is alineare relationship between the logit of the outcome and each predictor variables. Recall that the outcome and each predictor variables. Recall that the logit function is logit(p) = log(p/(1-p)), where p is the probabilities of the outcome
There is no influential value (extreame values or outliers) in the continuous predictors
There is no high intercorrelation (i.e. multicollinearity) among the predictors.
Loading reequired R packages
library(tidyverse)
library(broom)
theme_set(theme_classic())
PimaIndiansDiabetes2.cleaned <- na.omit(PimaIndiansDiabetes2)
# Fit the logistic regression model
model.pima.logit2 <- glm(diabetes~., data = PimaIndiansDiabetes2.cleaned, family = binomial)
# Predict the probability (p) of diiabete positivity
probabilities.pima.logit <- predict(model.pima.logit2, type = "response")
predicted.pima.classes <- ifelse(probabilities.pima.logit > 0.5, "pos", "neg")
head(predicted.pima.classes)
## 4 5 7 9 14 15
## "neg" "pos" "neg" "pos" "pos" "pos"
# train.pima.indians
mydata <- PimaIndiansDiabetes2.cleaned %>%
dplyr::select_if(is.numeric)
predictors <- colnames(mydata)
# Bind the logit and tidying the data for plot
model.pima.logit <- mydata %>%
mutate(logit = log(probabilities.pima.logit/(1-probabilities.pima.logit))) %>%
gather(key = "predictors", value = "predictor.value", -logit)
ggplot(model.pima.logit, aes(logit, predictor.value)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "loess") +
theme_bw() +
facet_wrap(~predictors, scales = "free_y")
Influential values are extreme individual data points that can alter the quaity of the logistic regression model. The most extreme values in the data can be examined by visualizing the Cook’s distance values.
Here we label the top 3 largest values:
plot(model.pima.logit2, which = 4, id.n = 3)
Note that, not all outliers are influential observations. To check kwhether the data contains potential influential observations, the standardized residual error can be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and may deserve closer attention.
The following R code computes the standardized residuals (.std.resid) and the Cook’s distance (.cooksd) using the R function augment()[broom package].
# Extract model results
model.pima.logit.data <- augment(model.pima.logit2) %>%
mutate(index = 1:n())
The data for the top 3 largest values, according to the Cook’s distance, can be displayed as follow:
model.pima.logit.data %>% top_n(3, .cooksd)
Plot the standardized residuals:
ggplot(model.pima.logit.data, aes(index, .std.resid)) +
geom_point(aes(color = diabetes), alpha = 0.5) +
theme_bw()
Filter potential influential data points with abs(.std.res) > 3
model.pima.logit.data %>% filter(abs(.std.resid) > 3)
There is no influential observation in our data.
When we have outliers in a continuous predictor, potential solutions include:
Multicollinearity corresponds to a situation where the data contain highly correlated predictor variables.
Multicollinearity is an important issue in regression analysis and should be fixed by removing the concerned variables. It can be assessed using the R function vif()
car::vif(model.pima.logit2)
## pregnant glucose pressure triceps insulin mass pedigree age
## 1.892387 1.378937 1.191287 1.638865 1.384038 1.832416 1.031715 1.974053
As a rule of thumb, a VIF value that exceeds 5 or 10 indicates a problematic amount of collinearity. In our example there is no collinearity: all variables have a value of VIF well below 5.
The multinomial logistic regressin is an extension of the logistic regression for multclass classifcation tasks. It is used when the outcome involves more than two classes.
Loading required R packages
library(tidyverse)
library(caret)
library(nnet)
##
## Attaching package: 'nnet'
## The following object is masked from 'package:mgcv':
##
## multinom
We’ll use the iris data set
# Load the data
data("iris")
# Inspect the data
sample_n(iris,3)
# Split the data into training and test set
set.seed(123)
training.samples.iris <- iris$Species %>% createDataPartition(p=0.8, list = FALSE)
train.iris.data <- iris[training.samples.iris,]
test.iris.data <- iris[-training.samples.iris,]
# Fit the model
model <- nnet::multinom(Species ~., data = train.iris.data)
## # weights: 18 (10 variable)
## initial value 131.833475
## iter 10 value 13.707358
## iter 20 value 5.548255
## iter 30 value 5.196272
## iter 40 value 4.985881
## iter 50 value 4.978698
## iter 60 value 4.970034
## iter 70 value 4.968625
## iter 80 value 4.967145
## iter 90 value 4.967125
## iter 100 value 4.967097
## final value 4.967097
## stopped after 100 iterations
# Summarize the model
summary(model)
## Call:
## nnet::multinom(formula = Species ~ ., data = train.iris.data)
##
## Coefficients:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 17.29267 -5.643165 -8.391525 14.61331 -2.091772
## virginica -21.99694 -6.650591 -14.538985 22.01169 14.158731
##
## Std. Errors:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 43.11205 119.7240 198.5332 91.66177 59.26249
## virginica 43.68199 119.7252 198.5908 91.81816 59.59031
##
## Residual Deviance: 9.934194
## AIC: 29.93419
# Make predictions
predicted.species <- model %>% predict(test.iris.data)
head(predicted.species)
## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
# Model accuracy
mean(predicted.species == test.iris.data$Species)
## [1] 0.9666667
The following discriminant analysis methods will be described:
Linear discriminant analysis (LDA): Uses linear combinations of predictors to predict the class of a given observation. Assumes that the predictor variables (p) are normally distributed and the classes have identical variances (for univariate analysis, p = 1) or identical covariance matrices (for multvariate analysis, p > 1).
Quadratic discriminant analysis (QDA): More flexible than LDA. Here, there is no assumption that the covariance matrix of classes is the same
Mixture discriminant analysis (MDA): Each class is assumed to be a Gaussian mixture of subclasses.
Flexible Discriminant Analysis (FDA): Non-linear combination of predictors is used such as splines.
Regularized discriminant analysis (RDA): Regularization (or shrinkage) improves the estimate of the covariance matrices in situation where the number of predictors is larger than the number of samples in the training data. This leads to an improvement of the discriminant analysis.
library(tidyverse)
library(caret)
theme_set(theme_classic())
# Lodd the data
data("iris")
# Split the data into training and test set
set.seed(123)
training.samples.iris <- iris$Species %>%
createDataPartition(p=0.8, list = FALSE)
train.data.iris <- iris[training.samples.iris, ]
test.data.iris <- iris[-training.samples.iris, ]
# Estimate preprocessing parameters
preproc.pram <- train.data.iris %>%
preProcess(method = c("center", "scale"))
# Transform the data using the estimated parameters
train.transformed <- preproc.pram %>% predict(train.data.iris)
test.transformed <- preproc.pram %>% predict(test.data.iris)
Before performing LDA, consider: * Inspecting the univariate distribution of each variable and make sure that they are normally distribute. If not, you can transform them using log and root for exponential distributions and Box-Cox for skewed distributions.
*removing outliers from your data and standardize the variables to make their scale comparable.
library(MASS)
# Fit the model
model.lda <- lda(Species ~., data = train.transformed)
# Make predictions
predictions <- model.lda %>% predict(test.transformed)
# Model accuracy
mean(predictions$class == test.transformed$Species)
## [1] 1
model.lda
## Call:
## lda(Species ~ ., data = train.transformed)
##
## Prior probabilities of groups:
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa -1.0120728 0.7867793 -1.2927218 -1.2496079
## versicolor 0.1174121 -0.6478157 0.2724253 0.1541511
## virginica 0.8946607 -0.1389636 1.0202965 1.0954568
##
## Coefficients of linear discriminants:
## LD1 LD2
## Sepal.Length 0.9108023 0.03183011
## Sepal.Width 0.6477657 0.89852536
## Petal.Length -4.0816032 -2.22724052
## Petal.Width -2.3128276 2.65441936
##
## Proportion of trace:
## LD1 LD2
## 0.9905 0.0095
summary(model.lda)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 8 -none- numeric
## lev 3 -none- character
## svd 2 -none- numeric
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
LDA determines group means and computes, for each individual, the probability of belonging to the different groups. The individual is then affected to the group with the highest probability score.
The lda() outputs contain the following elements: * prior probabilities of groups: the proportion of training observation in each group. For example, there are 33% of the training observations in the setosa group * Group means: group center of gravity. Shows the mean of each variable in each group. * Coefficients of linear discriminants: Shows the linear combination of predictor variables that are used to form the LDA decisionrule. For example
LD1 = 0.01*Sepal.Length + 0.64*Sepal.Width - 4.08*Petal.Length = 2.3*Petal.Width LD2 = 0.03*Sepal.Length + 0.89*Sepal.Width - 2.2*Petal.Length - 2.6*Petal.Width
Using the function Plot() produces plots of the linear discriminants, obtained by computing LD1 and LD2 for each of the training observations
plot(model.lda)
lda.data <- cbind(train.transformed, predict(model.lda)$x)
ggplot(lda.data, aes(LD1, LD2)) +
geom_point(aes(color = Species))
It can be seen that, our model correctly classified 100% of observations, which is excellent.
Note that, by default, the probability cutoff used to decide group-membership is 0.5
In some situation, you might want to increase the precision of the model. In this case you can fine-tune the model by adjusting the posterior probability cutoff. For example, you can increase or lower the cutoff
Note that, if the predictor variables are standardized before computing LDA, the discriminator weights can be used as measures of variable importance for feature selection
QDA is little bit more flexible than LDA, in the sense that it does not assumes the equality of variance/covariance. In other words, for QDA the covariance matrix can be different for each class.
LDA tends to be a better than QDA then you have a small training set.
In contrast, QDA is recommended if the training set is very large, so that the variance of the classifier is not a major issue, or if the assumption of a common covariance matrix for the K classes if clearly untenable
QDA can be computed using the R function qda() [MASS package]
library(MASS)
#Fit the model
model.qda <- qda(Species~., data = train.transformed)
model.qda
## Call:
## qda(Species ~ ., data = train.transformed)
##
## Prior probabilities of groups:
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa -1.0120728 0.7867793 -1.2927218 -1.2496079
## versicolor 0.1174121 -0.6478157 0.2724253 0.1541511
## virginica 0.8946607 -0.1389636 1.0202965 1.0954568
# Make predictions
predictions.qda <- model.qda %>% predict(test.transformed)
# Model accuracy
mean(predictions.qda$class == test.transformed$Species)
## [1] 1
The LDA classifier assumes that each class comes from a single normal (or Gaussian ) distribution.
This is too restrictive.
For MDA, ther are classes, and each class is assumed to be a Gaussian mixture of subclasses, where each data point has a probability of belonging to each class. Equality of covariance matrix, among classes, is still assumed
library(mda)
## Loading required package: class
## Loaded mda 0.4-10
model.mda <- mda(Species~., data = train.transformed)
model.mda
## Call:
## mda(formula = Species ~ ., data = train.transformed)
##
## Dimension: 5
##
## Percent Between-Group Variance Explained:
## v1 v2 v3 v4 v5
## 96.77 99.63 99.86 100.00 100.00
##
## Degrees of Freedom (per dimension): 5
##
## Training Misclassification Error: 0.025 ( N = 120 )
##
## Deviance: 14.005
# Make predictions
predicted.classes.mda <- model.mda %>% predict(test.transformed)
# Model accuracy
mean(predicted.classes.mda == test.transformed$Species)
## [1] 1
FDA is a flexible extension of LDA that uses non-linear combinations of predictors such as splines. FDA is useful to model multivariate non-normality or non-linear relationships among variables within each group, alowing for a more accurate classification
library(mda)
# Fit the model
model.fda <- fda(Species~., data = train.transformed)
model.fda
## Call:
## fda(formula = Species ~ ., data = train.transformed)
##
## Dimension: 2
##
## Percent Between-Group Variance Explained:
## v1 v2
## 99.05 100.00
##
## Degrees of Freedom (per dimension): 5
##
## Training Misclassification Error: 0.025 ( N = 120 )
predicted.classes.fda <- model.fda %>% predict(test.transformed)
#Model accuracy
mean(predicted.classes.fda == test.transformed$Species)
## [1] 1
RDA builds a classification rule by regularizing the group covariance matrices allowing a more robust model against multicollinearity in the data. This might be very useful for a large multivariate data set containing highly correlated predictors.
Regularized discriminant analysis is a kind of a trade-off between LDA and QDA. Recall that, in LDA we assume equality of covariance matrix for all of the classes. QDA assumes different covariance matrices for all the classes. QDa assumes different covariance matrices for all the classes. Regularized discriminant analysis is an intermediate between LDA and QDA.
RDA shrinks the separte covariances of QDA toward a common covariance as in LDA. This improves the estimate of the covariance matrices in situations where the number of predictors is larger thant the number of samples in the training data, potentially leading to an improvement of the model accuracy.
library(klaR)
# Fit the model
model.rda <- rda(Species~., data = train.transformed)
# Make predictions
predictions.rda <- model.rda %>% predict(test.transformed)
# Model accuracy
mean(predictions.rda$class == test.transformed$Species)
## [1] 1
The Naive Bayes Classifier is a simple and powerful method that can be used for binary and multiclass classification problems.
Naive Bayes classifier predicts the class membership probability of observations using Bayes theorem, which is based on conditional probability, that is the probability of something to happen, given that something else has already occured.
Will use PimaIndiansDiabetes2 data set
library(klaR)
# Fit the model
model.naive <- NaiveBayes(diabetes ~., data = train.pima.indians)
model.naive
## $apriori
## grouping
## neg pos
## 0.6687898 0.3312102
##
## $tables
## $tables$pregnant
## [,1] [,2]
## neg 2.890476 2.645282
## pos 4.471154 3.968215
##
## $tables$glucose
## [,1] [,2]
## neg 112.1381 25.23393
## pos 147.1635 29.39590
##
## $tables$pressure
## [,1] [,2]
## neg 69.19048 12.13173
## pos 73.54808 13.71264
##
## $tables$triceps
## [,1] [,2]
## neg 27.83333 10.668822
## pos 32.68269 9.539152
##
## $tables$insulin
## [,1] [,2]
## neg 137.0571 110.5755
## pos 214.2115 140.5018
##
## $tables$mass
## [,1] [,2]
## neg 32.11524 6.905892
## pos 35.42500 6.825278
##
## $tables$pedigree
## [,1] [,2]
## neg 0.4699952 0.3091545
## pos 0.6139904 0.3853125
##
## $tables$age
## [,1] [,2]
## neg 28.6619 9.085715
## pos 35.8750 10.248076
##
##
## $levels
## [1] "neg" "pos"
##
## $call
## NaiveBayes.default(x = X, grouping = Y)
##
## $x
## pregnant glucose pressure triceps insulin mass pedigree age
## 4 1 89 66 23 94 28.1 0.167 21
## 5 0 137 40 35 168 43.1 2.288 33
## 7 3 78 50 32 88 31.0 0.248 26
## 9 2 197 70 45 543 30.5 0.158 53
## 14 1 189 60 23 846 30.1 0.398 59
## 15 5 166 72 19 175 25.8 0.587 51
## 17 0 118 84 47 230 45.8 0.551 31
## 19 1 103 30 38 83 43.3 0.183 33
## 20 1 115 70 30 96 34.6 0.529 32
## 26 10 125 70 26 115 31.1 0.205 41
## 33 3 88 58 11 54 24.8 0.267 22
## 44 9 171 110 24 240 45.4 0.721 54
## 51 1 103 80 11 82 19.4 0.491 22
## 52 1 101 50 15 36 24.2 0.526 26
## 53 5 88 66 21 23 24.4 0.342 30
## 54 8 176 90 34 300 33.7 0.467 58
## 55 7 150 66 42 342 34.7 0.718 42
## 57 7 187 68 39 304 37.7 0.254 41
## 58 0 100 88 60 110 46.8 0.962 31
## 64 2 141 58 34 128 25.4 0.699 24
## 69 1 95 66 13 38 19.6 0.334 25
## 70 4 146 85 27 100 28.9 0.189 27
## 71 2 100 66 20 90 32.9 0.867 28
## 74 4 129 86 20 270 35.1 0.231 23
## 83 7 83 78 26 71 29.3 0.767 36
## 86 2 110 74 29 125 32.4 0.698 27
## 88 2 100 68 25 71 38.5 0.324 26
## 89 15 136 70 32 110 37.1 0.153 43
## 92 4 123 80 15 176 32.0 0.443 34
## 93 7 81 78 40 48 46.7 0.261 42
## 95 2 142 82 18 64 24.7 0.761 21
## 96 6 144 72 27 228 33.9 0.255 40
## 98 1 71 48 18 76 20.4 0.323 22
## 99 6 93 50 30 64 28.7 0.356 23
## 100 1 122 90 51 220 49.7 0.325 31
## 104 1 81 72 18 40 26.6 0.283 24
## 108 4 144 58 28 140 29.5 0.287 37
## 109 3 83 58 31 18 34.3 0.336 25
## 112 8 155 62 26 495 34.0 0.543 46
## 113 1 89 76 34 37 31.2 0.192 23
## 115 7 160 54 32 175 30.5 0.588 39
## 120 4 99 76 15 51 23.2 0.223 21
## 121 0 162 76 56 100 53.2 0.759 25
## 123 2 107 74 30 100 33.6 0.404 23
## 126 1 88 30 42 99 55.0 0.496 26
## 127 3 120 70 30 135 42.9 0.452 30
## 128 1 118 58 36 94 33.3 0.261 23
## 129 1 117 88 24 145 34.5 0.403 40
## 131 4 173 70 14 168 29.7 0.361 33
## 133 3 170 64 37 225 34.5 0.356 30
## 135 2 96 68 13 49 21.1 0.647 26
## 136 2 125 60 20 140 33.8 0.088 31
## 140 5 105 72 29 325 36.9 0.159 28
## 143 2 108 52 26 63 32.5 0.318 22
## 145 4 154 62 31 284 32.8 0.237 23
## 148 2 106 64 35 119 30.5 1.400 34
## 151 1 136 74 50 204 37.4 0.399 24
## 153 9 156 86 28 155 34.3 1.189 42
## 154 1 153 82 42 485 40.6 0.687 23
## 158 1 109 56 21 135 25.2 0.833 23
## 159 2 88 74 19 53 29.0 0.229 22
## 160 17 163 72 41 114 40.9 0.817 47
## 162 7 102 74 40 105 37.2 0.204 45
## 163 0 114 80 34 285 44.2 0.167 27
## 172 6 134 70 23 130 35.4 0.542 29
## 174 1 79 60 42 48 43.5 0.678 23
## 175 2 75 64 24 55 29.7 0.370 33
## 178 0 129 110 46 130 67.1 0.319 26
## 182 0 119 64 18 92 34.9 0.725 23
## 187 8 181 68 36 495 30.1 0.615 60
## 188 1 128 98 41 58 32.0 1.321 33
## 189 8 109 76 39 114 27.9 0.640 31
## 190 5 139 80 35 160 31.6 0.361 25
## 192 9 123 70 44 94 33.1 0.374 40
## 196 5 158 84 41 210 39.4 0.395 29
## 198 3 107 62 13 48 22.9 0.678 23
## 199 4 109 64 44 99 34.8 0.905 26
## 200 4 148 60 27 318 30.9 0.150 29
## 204 2 99 70 16 44 20.4 0.235 27
## 205 6 103 72 32 190 37.7 0.324 55
## 214 0 140 65 26 130 42.6 0.431 24
## 215 9 112 82 32 175 34.2 0.260 36
## 216 12 151 70 40 271 41.8 0.742 38
## 218 6 125 68 30 120 30.0 0.464 32
## 221 0 177 60 29 478 34.6 1.072 21
## 225 1 100 66 15 56 23.6 0.666 26
## 226 1 87 78 27 32 34.6 0.101 22
## 229 4 197 70 39 744 36.7 2.329 31
## 230 0 117 80 31 53 45.2 0.089 24
## 232 6 134 80 37 370 46.2 0.238 46
## 233 1 79 80 25 37 25.4 0.583 22
## 235 3 74 68 28 45 29.7 0.293 23
## 237 7 181 84 21 192 35.9 0.586 51
## 242 4 91 70 32 88 33.1 0.446 22
## 244 6 119 50 22 176 27.1 1.318 33
## 245 2 146 76 35 194 38.2 0.329 29
## 248 0 165 90 33 680 52.3 0.427 23
## 249 9 124 70 33 402 35.4 0.282 34
## 255 12 92 62 7 258 27.6 0.926 44
## 259 1 193 50 16 375 25.9 0.655 24
## 260 11 155 76 28 150 33.3 1.353 51
## 261 3 191 68 15 130 30.9 0.299 34
## 266 5 96 74 18 67 33.6 0.997 43
## 272 2 108 62 32 56 25.2 0.128 21
## 274 1 71 78 50 45 33.2 0.422 21
## 276 2 100 70 52 57 40.5 0.677 25
## 280 2 108 62 10 278 25.3 0.881 22
## 282 10 129 76 28 122 35.9 0.280 39
## 283 7 133 88 15 155 32.4 0.262 37
## 286 7 136 74 26 135 26.0 0.647 51
## 287 5 155 84 44 545 38.7 0.619 34
## 290 5 108 72 43 75 36.1 0.263 33
## 291 0 78 88 29 40 36.9 0.434 21
## 292 0 107 62 30 74 36.6 0.757 25
## 293 2 128 78 37 182 43.3 1.224 31
## 294 1 128 48 45 194 40.5 0.613 24
## 296 6 151 62 31 120 35.5 0.692 28
## 297 2 146 70 38 360 28.0 0.337 29
## 298 0 126 84 29 215 30.7 0.520 24
## 306 2 120 76 37 105 39.7 0.215 29
## 307 10 161 68 23 132 25.5 0.326 47
## 308 0 137 68 14 148 24.8 0.143 21
## 310 2 124 68 28 205 32.9 0.875 30
## 312 0 106 70 37 148 39.4 0.605 22
## 313 2 155 74 17 96 26.6 0.433 27
## 314 3 113 50 10 85 29.5 0.626 25
## 316 2 112 68 22 94 34.1 0.315 26
## 319 3 115 66 39 140 38.1 0.150 28
## 321 4 129 60 12 231 27.5 0.527 31
## 324 13 152 90 33 29 26.8 0.731 43
## 326 1 157 72 21 168 25.6 0.123 24
## 327 1 122 64 32 156 35.1 0.692 30
## 330 6 105 70 32 68 30.8 0.122 37
## 332 2 87 58 16 52 32.7 0.166 25
## 335 1 95 60 18 58 23.9 0.260 22
## 336 0 165 76 43 255 47.9 0.259 26
## 339 9 152 78 34 171 34.2 0.893 33
## 341 1 130 70 13 105 25.9 0.472 22
## 346 8 126 88 36 108 38.5 0.349 49
## 349 3 99 62 19 74 21.8 0.279 26
## 354 1 90 62 12 43 27.2 0.580 24
## 357 1 125 50 40 167 33.3 0.962 28
## 359 12 88 74 40 54 35.3 0.378 48
## 360 1 196 76 36 249 36.5 0.875 29
## 361 5 189 64 33 325 31.2 0.583 29
## 365 4 147 74 25 293 34.9 0.385 30
## 369 3 81 86 16 66 27.5 0.306 22
## 370 1 133 102 28 140 32.8 0.234 45
## 371 3 173 82 48 465 38.4 2.137 25
## 374 2 105 58 40 94 34.9 0.225 25
## 375 2 122 52 43 158 36.2 0.816 28
## 376 12 140 82 43 325 39.2 0.528 58
## 378 1 87 60 37 75 37.2 0.509 22
## 381 1 107 72 30 82 30.8 0.821 24
## 383 1 109 60 8 182 25.4 0.947 21
## 385 1 125 70 24 110 24.3 0.221 25
## 386 1 119 54 13 50 22.3 0.205 24
## 390 3 100 68 23 81 31.6 0.949 28
## 391 1 100 66 29 196 32.0 0.444 42
## 393 1 131 64 14 415 23.7 0.389 21
## 394 4 116 72 12 87 22.1 0.463 37
## 396 2 127 58 24 275 27.7 1.600 25
## 397 3 96 56 34 115 24.7 0.944 39
## 403 5 136 84 41 88 35.0 0.286 35
## 406 2 123 48 32 165 42.1 0.520 26
## 410 1 172 68 49 579 42.4 0.702 28
## 413 1 143 84 23 310 42.4 1.076 22
## 414 1 143 74 22 61 26.2 0.256 21
## 415 0 138 60 35 167 34.6 0.534 21
## 420 3 129 64 29 115 26.4 0.219 28
## 421 1 119 88 41 170 45.3 0.507 26
## 423 0 102 64 46 78 40.6 0.496 21
## 425 8 151 78 32 210 42.9 0.516 36
## 426 4 184 78 39 277 37.0 0.264 31
## 428 1 181 64 30 180 34.1 0.328 38
## 429 0 135 94 46 145 40.6 0.284 26
## 430 1 95 82 25 180 35.0 0.233 43
## 432 3 89 74 16 85 30.4 0.551 38
## 433 1 80 74 11 60 30.0 0.527 22
## 442 2 83 66 23 50 32.2 0.497 22
## 443 4 117 64 27 120 33.2 0.230 24
## 448 0 95 80 45 92 36.5 0.330 26
## 449 0 104 64 37 64 33.6 0.510 22
## 451 1 82 64 13 95 21.2 0.415 23
## 458 5 86 68 28 71 30.2 0.364 24
## 459 10 148 84 48 237 37.6 1.001 51
## 460 9 134 74 33 60 25.9 0.460 81
## 461 9 120 72 22 56 20.8 0.733 48
## 463 8 74 70 40 49 35.3 0.705 39
## 466 0 124 56 13 105 21.8 0.452 21
## 467 0 74 52 10 36 27.8 0.269 22
## 468 0 97 64 36 100 36.8 0.600 25
## 470 6 154 78 41 140 46.1 0.571 27
## 477 2 105 80 45 191 33.7 0.711 29
## 479 8 126 74 38 75 25.9 0.162 39
## 481 3 158 70 30 328 35.5 0.344 35
## 483 4 85 58 22 49 27.8 0.306 28
## 484 0 84 82 31 125 38.2 0.233 23
## 486 0 135 68 42 250 42.3 0.365 24
## 487 1 139 62 41 480 40.7 0.536 21
## 488 0 173 78 32 265 46.5 1.159 58
## 491 2 83 65 28 66 36.8 0.629 24
## 494 4 125 70 18 122 28.9 1.144 45
## 498 2 81 72 15 76 30.1 0.547 25
## 499 7 195 70 33 145 25.1 0.163 55
## 500 6 154 74 32 193 29.3 0.839 39
## 501 2 117 90 19 71 25.2 0.313 21
## 504 7 94 64 25 79 33.3 0.738 41
## 507 0 180 90 26 90 36.5 0.314 35
## 509 2 84 50 23 76 30.4 0.968 21
## 515 3 99 54 19 86 25.6 0.154 24
## 516 3 163 70 18 105 31.6 0.268 28
## 517 9 145 88 34 165 30.3 0.771 53
## 520 6 129 90 7 326 19.6 0.582 60
## 521 2 68 70 32 66 25.0 0.187 25
## 527 1 97 64 19 82 18.2 0.299 21
## 528 3 116 74 15 105 26.3 0.107 24
## 531 2 122 60 18 106 29.8 0.717 22
## 533 1 86 66 52 65 41.3 0.917 29
## 541 8 100 74 40 215 39.4 0.661 43
## 542 3 128 72 25 190 32.4 0.549 27
## 544 4 84 90 23 56 39.5 0.159 25
## 545 1 88 78 29 76 32.0 0.365 29
## 546 8 186 90 35 225 34.5 0.423 37
## 547 5 187 76 27 207 43.6 1.034 53
## 552 3 84 68 30 106 31.9 0.591 25
## 556 7 124 70 33 215 25.5 0.161 37
## 562 0 198 66 32 274 41.3 0.502 28
## 563 1 87 68 34 77 37.6 0.401 24
## 564 6 99 60 19 54 26.9 0.497 32
## 566 2 95 54 14 88 26.1 0.748 22
## 567 1 99 72 30 18 38.6 0.412 21
## 568 6 92 62 32 126 32.0 0.085 46
## 569 4 154 72 29 126 31.3 0.338 37
## 575 1 143 86 30 330 30.1 0.892 23
## 576 1 119 44 47 63 35.5 0.280 25
## 577 6 108 44 20 130 24.0 0.813 35
## 585 8 124 76 24 600 28.7 0.687 52
## 589 3 176 86 27 156 33.3 1.154 52
## 592 2 112 78 50 140 39.4 0.175 24
## 594 2 82 52 22 115 28.5 1.699 25
## 595 6 123 72 45 230 33.6 0.733 34
## 596 0 188 82 14 185 32.0 0.682 22
## 598 1 89 24 19 25 27.8 0.559 21
## 600 1 109 38 18 120 23.1 0.407 26
## 607 1 181 78 42 293 40.0 1.258 22
## 608 1 92 62 25 41 19.5 0.482 25
## 609 0 152 82 39 272 41.5 0.270 27
## 610 1 111 62 13 182 24.0 0.138 23
## 611 3 106 54 21 158 30.9 0.292 24
## 621 2 112 86 42 160 38.4 0.246 28
## 624 0 94 70 27 115 43.5 0.347 21
## 626 4 90 88 47 54 37.7 0.362 29
## 634 1 128 82 17 183 27.5 0.115 22
## 638 2 94 76 18 66 31.6 0.649 23
## 641 0 102 86 17 105 29.3 0.695 27
## 645 3 103 72 30 152 27.6 0.730 27
## 646 2 157 74 35 440 39.4 0.134 30
## 647 1 167 74 17 144 23.4 0.447 33
## 648 0 179 50 36 159 37.8 0.455 22
## 649 11 136 84 35 130 28.3 0.260 42
## 651 1 91 54 25 100 25.2 0.234 23
## 652 1 117 60 23 106 33.8 0.466 27
## 653 5 123 74 40 77 34.1 0.269 28
## 655 1 106 70 28 135 34.2 0.142 22
## 656 2 155 52 27 540 38.7 0.240 25
## 657 2 101 58 35 90 21.8 0.155 22
## 658 1 120 80 48 200 38.9 1.162 41
## 660 3 80 82 31 70 34.2 1.292 27
## 663 8 167 106 46 231 37.6 0.165 43
## 664 9 145 80 46 130 37.9 0.637 40
## 666 1 112 80 45 132 34.8 0.217 24
## 669 6 98 58 33 190 34.0 0.430 43
## 670 9 154 78 30 100 30.9 0.164 45
## 671 6 165 68 26 168 33.6 0.631 49
## 673 10 68 106 23 49 35.5 0.285 47
## 674 3 123 100 35 240 57.3 0.880 22
## 680 2 101 58 17 265 24.2 0.614 23
## 681 2 56 56 28 45 24.2 0.332 22
## 686 2 129 74 26 205 33.2 0.591 25
## 689 1 140 74 26 180 24.1 0.828 23
## 693 2 121 70 32 95 39.1 0.886 23
## 694 7 129 68 49 125 38.5 0.439 43
## 696 7 142 90 24 480 30.4 0.128 43
## 697 3 169 74 19 125 29.9 0.268 31
## 699 4 127 88 11 155 34.5 0.598 28
## 701 2 122 76 27 200 35.9 0.483 26
## 705 4 110 76 20 100 28.4 0.118 27
## 708 2 127 46 21 335 34.4 0.176 22
## 711 3 158 64 13 387 31.2 0.295 24
## 714 0 134 58 20 291 26.4 0.352 21
## 716 7 187 50 33 392 33.9 0.826 34
## 717 3 173 78 39 185 33.8 0.970 31
## 719 1 108 60 46 178 35.5 0.415 24
## 722 1 114 66 36 200 38.1 0.289 21
## 723 1 149 68 29 127 29.3 0.349 42
## 724 5 117 86 30 105 39.1 0.251 42
## 727 1 116 78 29 180 36.1 0.496 25
## 731 3 130 78 23 79 28.4 0.323 34
## 733 2 174 88 37 120 44.5 0.646 24
## 734 2 106 56 27 165 29.0 0.426 22
## 737 0 126 86 27 120 27.4 0.515 21
## 741 11 120 80 37 150 42.3 0.785 48
## 742 3 102 44 20 94 30.8 0.400 26
## 745 13 153 88 37 140 40.6 1.174 39
## 746 12 100 84 33 105 30.0 0.488 46
## 748 1 81 74 41 57 46.3 1.096 32
## 749 3 187 70 22 200 36.4 0.408 36
## 752 1 121 78 39 74 39.0 0.261 28
## 754 0 181 88 44 510 43.3 0.222 26
## 756 1 128 88 39 110 36.5 1.057 37
## 761 2 88 58 26 16 28.4 0.766 22
## 764 10 101 76 48 180 32.9 0.171 63
## 766 5 121 72 23 112 26.2 0.245 30
##
## $usekernel
## [1] FALSE
##
## $varnames
## [1] "pregnant" "glucose" "pressure" "triceps" "insulin" "mass"
## [7] "pedigree" "age"
##
## attr(,"class")
## [1] "NaiveBayes"
# Make predictions
prediction.naive <- model.naive %>% predict(test.pima.indians)
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 33
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 37
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 38
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 39
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 43
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 46
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 47
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 50
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 52
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 54
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 56
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 57
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 58
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 59
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 60
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 62
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 63
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 64
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 65
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 66
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 67
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 68
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 69
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 70
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 71
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 77
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 78
# Model accuracy
mean(prediction.naive$class == test.pima.indians$diabetes)
## [1] 0.8205128
The caet R package can automatically train the model and assess the modle accuracy using k-fold cross-validation
library(klaR)
set.seed(123)
model.naive2 <- train(diabetes~., data = train.pima.indians, method = "nb", trControl = trainControl("cv", number = 10))
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
# make predictions
predicted.classes.naive2 <- model.naive2 %>% predict(test.pima.indians)
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 33
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 37
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 38
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 39
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 43
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 46
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 47
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 50
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 52
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 54
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 56
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 57
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 58
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 59
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 60
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 62
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 63
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 64
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 65
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 66
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 67
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 68
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 69
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 70
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 71
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 77
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 78
# Model n accuracy
mean(predicted.classes.naive2 == test.pima.indians$diabetes)
## [1] 0.8076923
Support Vector Machine (or SVM) is a machine learning technique used for classification tasks. Briefly, SVM works by identifying the optimal decision boundary that separates data points from different groups (or classes), and then predicts the class of new observations based on the separation boundary.
Depending on the situations, the different groups might be separable by a linear straight line or by a non-linear boundary line.
Support vector machine methods can handle both linear and non-linear class boundaries. It can be used for both two-classs and multi-class classification problems.
In real life data, the separation boundary is generally nonlinear. Technically, the SVM algorithm perform a non-linear classification using what is called the kernel trick. The most commonly used kernel transformations ar e_polynomial kernel_ and radial kernel
Note that, there is also an extension of the SVM for regression, called support vector regression.
library(tidyverse)
library(caret)
Data set: PimaIndiansDiabetes2 [in mlbench package]
# Load the data
data("PimaIndiansDiabetes2", package = "mlbench")
pima.data.cleaned <- na.omit(PimaIndiansDiabetes2)
# Split the data into training and test set
set.seed(123)
t.sample <- pima.data.cleaned$diabetes %>% createDataPartition(p = 0.8, list = FALSE)
In the following example variables are normalized to make their scale comparable. This is automatically done before building the SVM classifier by setting the option preProcess = c(“center”, “scale”).
# Fit the model on the training set
set.seed(123)
model.svm <- train(
diabetes~., data = train.pima.indians, method = "svmLinear",
trContro = trainControl("cv", number = 10),
preProcess = c("center", "scale")
)
# Make predictions on the test data
predicted.classes.svm <- model.svm %>% predict(test.pima.indians)
head(predicted.classes.svm)
## [1] neg pos neg pos pos neg
## Levels: neg pos
# Computed model accuracy rate
mean(predicted.classes.svm == test.pima.indians$diabetes)
## [1] 0.7820513
Note that, there is a tuning parameter C, also known as Cost, that determines the possible misclassifications. It essentially imposes a penalty to the model for making an error. The higher the value of C, the less likely it is that the SVM algorithm will misclassify a point.
By default caret builds the SVM linear classifier usin g C= 1. You can check this by typing model in R console
model.svm
## Support Vector Machines with Linear Kernel
##
## 314 samples
## 8 predictor
## 2 classes: 'neg', 'pos'
##
## Pre-processing: centered (8), scaled (8)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 314, 314, 314, 314, 314, 314, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7862095 0.4893315
##
## Tuning parameter 'C' was held constant at a value of 1
The following R code compute SVM for a grid values of C and choose automatically the final model for predictions:
# Fit the model on the training set
set.seed(123)
model.svm2 <- train(
diabetes ~., data = train.pima.indians, method = "svmLinear",
trControl = trainControl("cv", number = 10),
tuneGrid = expand.grid(C = seq(0.1, 2, length = 20)),
preProcess = c("center", "scale")
)
# Plot model accuracy vs different values of Cost
plot(model.svm2)
# Print the best tuning parameter C that maximizes model accuracy
model.svm2$bestTune
Let’s use the best model
# Make predictions on the test data
predicted.classess.svm2 <- model.svm2 %>% predict(test.pima.indians)
# Compute model accuracy rate
mean(predicted.classess.svm2 == test.pima.indians$diabetes)
## [1] 0.7820513
To build a non-linear SVM classifier, we can use either polynomial kernel or radial kernel function.
# Fit the model on the training set
set.seed(123)
model.svm.radial <- train(
diabetes~., data = train.pima.indians, method = "svmRadial",
trControl = trainControl("cv", number = 10),
preProcess = c("center", "scale"),
tuneLength = 10
)
# Print the best tuning parameter sigma and C that maximizes model accuracy
model.svm.radial$bestTune
# Make predictions on the test data
predicted.classess.svm.radial <- model.svm.radial %>% predict(test.pima.indians)
# Compute model Accuracy rate
mean(predicted.classess.svm.radial == test.pima.indians$diabetes)
## [1] 0.7948718
# Fit the model on the training set
set.seed(123)
model.svm.polynomial <- train(
diabetes~., data = train.pima.indians, method = "svmPoly",
trControl = trainControl("cv", number = 10),
preProcess = c("center", "scale"),
tuneLength = 4
)
# Print the best tuning parameter sigma and C that maximizes model accuracy
model.svm.polynomial$bestTune
# Make predictions on the test data
predicted.classess.svm.polynomial <- model.svm.polynomial %>% predict(test.pima.indians)
# Compute model Accuracy rate
mean(predicted.classess.svm.polynomial == test.pima.indians$diabetes)
## [1] 0.7948718
In our examples, it can be seen that the SVM classifier using non-linear kernel gives a better result compared to the linear model.
After building a predictive classification model, you need to evaluate the performance of the model, that is how good the model is in predicting the outcome of new observations test data that have been not used to train the model.
The common use metrics and methods for assessing the performance of predictive classification models:
Average classification accuracy - representing the proportion of correctly classified observations.
Confusion matrix - which is 2x2 table showing four parameters, including the number of true positives, true negatives, false negatives and false positives.
Precision, Recall and Specificity - which are three major performance metrics describing a predictive classification model
ROC curve - which is a graphical summary of the overall performance of the model, showing the proportion of true positives and false positives at tall possible values of probability cutoff. The Area Under the Curve(AUC) summarizes the overall performance of the classifier.
library(tidyverse)
library(caret)
To keep things simple we’ll perform a binary classification, where the outcome variable can have only two possible values: negative vs positive.
We’ll compute an example of linear discriminant analysis model using the PimaIndiansDiabetes2 [mlbench package]
data("PimaIndiansDiabetes2", package = "mlbench")
pima.data.cleaned <- na.omit(PimaIndiansDiabetes2)
# Split the data into training and test set
set.seed(123)
t.sample <- pima.data.cleaned$diabetes %>% createDataPartition(p = 0.8, list = FALSE)
train.pima2 <- pima.data.cleaned[t.sample, ]
test.pima2 <- pima.data.cleaned[-t.sample, ]
library(MASS)
#Fit LDA
fit.lda <- lda(diabetes ~., data = train.pima2)
# Make predictions on the test data
predictions.pima2 <- predict(fit.lda, test.pima2)
preictions.probabilities2 <- predictions.pima2$posterior[,2]
predicted.classess2 <- predictions.pima2$class
observed.classes2 <- test.pima2$diabetes
The overall classification accuracy rate corresponds to the proportion of observations that have been correctly classified. Determining the raw classification accuracy is the first step in assessing the performance of a model.
accuracy <- mean(observed.classes2 == predicted.classess2)
accuracy
## [1] 0.8076923
error <- mean(observed.classes2 != predicted.classess2)
error
## [1] 0.1923077
From the output avove, the linear discrimant analysis correctly predicted the individual outcome in 81% of the cases. This is by far better than random guessing. The misclassification error rate can be calculated as 100-81 = 19%
In our example, a binary classifier can make two types of errors:
The proportion of these two types of errors can be determined by creating a confution matrxi, which compare the predicted coutcome values agains the known outcome vaues.
The R function table() can be used to produce a confusion matrix in order to determin how many observations were correctly or incorrectly classified. It compares the observed and the predicted outcome values and shows the number of correct and incorrect predictions categorized by type of outcome.
# Confusion matrix, number of cases
table(observed.classes2, predicted.classess2)
## predicted.classess2
## observed.classes2 neg pos
## neg 48 4
## pos 11 15
# Confusion matrix, proportion of cases
table(observed.classes2, predicted.classess2) %>% prop.table() %>% round(digits = 3)
## predicted.classess2
## observed.classes2 neg pos
## neg 0.615 0.051
## pos 0.141 0.192
The diagonal elements of the confusion matrix indicate correct predictions, while the off diagonals represent incorrect prediction. So, the correct classification rate is the sum of the number on the diagonal divided by the sample size in the test data. In our example, that is (48+15)/78 = 81%
Technically the raw prediction accuracy of the model is defined as (TruePositives + TrueNegatives)/SampleSize.
In addition to the raw classification accuracy, there are many other metrics that are widely used to examine the performance of a classification model, including:
Precision, which is the proportion of true positives among all the individuals that have been predicted to be diabetes-positive by the model. This represents the accuracy of a predicted positive outcome. Precision = TruePositives/(TruePositives + FalsePOsitives)
Sensitivity (or Recall), which is the True Positive Rate (TPR) or the proportion of identified positives among the diabetes-positive population (class = 1). Sensitivity = TruePOsitives/(TruePositives + FalseNegatives).
Specificity, which measures the True Negative Rate (TNR), that is the proportion of identified negatives among the diabetes-negative population (class = 0). Specificity = TrueNegatives/(TrueNegatives + FalseNegatives).
False POsitive Rate (FPR), which represents the proportion of identified positives among the healthy individuals (i.e. diabetes-negative). This can be seen as a false alarm. The FPR can be also calculated as 1-specificity. When positives are rare, the FPR can be high, leading to the situation where a predicted positive is most likely a negative.
confusionMatrix(observed.classes2, predicted.classess2, positive = "pos")
## Confusion Matrix and Statistics
##
## Reference
## Prediction neg pos
## neg 48 4
## pos 11 15
##
## Accuracy : 0.8077
## 95% CI : (0.7027, 0.8882)
## No Information Rate : 0.7564
## P-Value [Acc > NIR] : 0.1787
##
## Kappa : 0.5361
##
## Mcnemar's Test P-Value : 0.1213
##
## Sensitivity : 0.7895
## Specificity : 0.8136
## Pos Pred Value : 0.5769
## Neg Pred Value : 0.9231
## Prevalence : 0.2436
## Detection Rate : 0.1923
## Detection Prevalence : 0.3333
## Balanced Accuracy : 0.8015
##
## 'Positive' Class : pos
##
In our example, the sensitivity is ~58%, that is the proportion of diabetes-positive individuals that were correctly identified by the model as diabetes-positive. The specificity of the model is ~92%, that is the proportion of diabetes-negative individuals that were correctly identified by the model as diabetes-negative. The model precision or the proportion of positive predicted value is 79%.
The ROC curve (or receiver operating characteristics curve) is apopular graphical measure for assessing the performance or the accuracy of a classifier, which corresponds to the total
The ROC analysis can be easily performed using the R package pROC
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:glmnet':
##
## auc
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
res.roc <- roc(observed.classes2,preictions.probabilities2)
## Setting levels: control = neg, case = pos
## Setting direction: controls < cases
plot.roc(res.roc, print.auc = TRUE)
# Extract some interesting results
roc.data <- data_frame(
thresholds = res.roc$thresholds,
sensitivity = res.roc$sensitivities,
specificity = res.roc$specificities
)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
# Get the probability threshold for specificity = 0.6
roc.data %>% filter(specificity >= 0.6)
plot.roc(res.roc, print.auc = TRUE, print.thres = "best")
plot.roc(res.roc, print.thres = c(0.3,0.5,0.7))